## modes.r
## Vito D'Orazio
## June 16, 2014
## The mode() function in R returns the type of storage, not the statistical mode.  This script shows two methods for calculating the statistical mode and shows how to time them.

rm(list=ls())
setwd("/Users/vjdorazio/Desktop/IQSS/privacy_tools/R_workshop")

mydata <- read.csv("PUMS5extract.csv")

longmode <- function(v) {
    u <- unique(v)
    mymode <- u[1]
    t1 <- length(which(v==u[1]))
    
    for(i in 1:length(u)) {
        t <- length(which(v==u[i]))
        if (t > t1) {
            mymode <- u[i]
            t1 <- length(which(v==u[i]))
        }
    }
    return(mymode)
}

fastmode <- function(v) {
    u <- unique(v)
    return(u[which.max(tabulate(match(v, u)))])
} # what is the problem with fastmode?


# timing the process
system.time(longmode(mydata[,5]))
system.time(fastmode(mydata[,5]))

# alternatively...
start <- Sys.time()
longmode(mydata[,5])
print(Sys.time() - start)

start <- Sys.time()
fastmode(mydata[,5])
print(Sys.time() - start)